global root_dir = "`1'"

include "$root_dir/code/config/config.do"

cap noi log using ${log_dir}/figure_A6_ipc4_histograms.log, replace name(fig)

capture noi {


    * Import clasification file, restrict to machinery & total >= 100, output ipc4 histgorams
    clear

    *get the treshholds

    import delim ${classification_dir}/V6/ipc6XX_tf.csv, varnames(1) clear
    * a) identify machinery technical fields 
    gen machinery_field = techn_sector == "Mechanical engineering" & (techn_field == "Handling" ///
        | techn_field == "Machine tools" | techn_field == "Other special machines" ///
        | techn_field == "Textile and paper machines")
    gen ipc3 = substr(ipc6, 1, 3)
    gen ipc4 = substr(ipc6, 1, 4)
    replace machinery_field = 0 if ipc3 == "F41" | ipc3 == "F42"
    replace machinery_field = 1 if ipc4 == "B42C"
    replace machinery_field = 1 if ipc4 == "B07C"
    replace machinery_field = 1 if ipc6 == "G05B19"
    replace machinery_field = 1 if ipc6 == "G05B2219"
    replace machinery_field = 1 if ipc6 == "B62D65"
    gen ipc1 = substr(ipc6, 1, 1)
    assert machinery_field == 0 if ipc1 == "Y"
    tab techn_field if machinery_field == 1
    * Drop non-machinery technical fields descriptions
    replace techn_field = "non-machinery" if machinery_field == 0
    replace techn_field = "non-classified" if ipc1 == "Y"
    drop ipc1 ipc3 ipc4

    cap log using ${numb_dir}/figure_A6_ipc4_histograms_numbers.log, replace name(numbers)
    * mark auto95 and auto90 based on non-y machinery codes with n>= 100 and get thresholds

    *again, slightly different from python, down to the way percentiles are calculated
    _pctile share_anyclassification if machinery == 1 & total >= 100, p(95) 
    return list 
    local tresh_auto95 = r(r1)
    di "Treshhold auto95: `tresh_auto95'"
    local text_tauto95 = `tresh_auto95' - .03
    local tresh_auto95_r: di %4.2f `tresh_auto95'

    _pctile share_anyclassification if machinery == 1 & total >= 100, p(90) 
    return list
    local tresh_auto90 = r(r1)
    di "Treshhold auto90: `tresh_auto90'"
    local text_tauto90 = `tresh_auto90' - .03
    local tresh_auto90_r: di %4.2f `tresh_auto90'

    cap log close numbers
    qui include ${code_dir}/config/figuretools.do

    * output histograms for IPC4 + G05 / G06

    import delim ${classification_dir}/V6/codes_auto95_ipc4_n.csv, varnames(1) clear

    tw (histogram share if share < `tresh_auto90_r', start(0) width(0.01) lcolor(white) fcolor(gs8) lwidth(0.25pt) freq) ///
    (histogram share if (share >= `tresh_auto90_r') & (share < 0.4801012213485026), start(`tresh_auto90_r') width(0.01) lcolor(white) fcolor("`crm6'") lwidth(0.25pt) freq) ///
    (histogram share if share >= 0.4801012213485026, start(0.4801012213485026) width(0.01) lcolor(white) lwidth(0.25pt) fcolor("`crm4'") freq), /// 
    xtitle("Prevalence of automation keywords") ytitle("Frequency") ///
    legend(off) yscale(r(0 .)) ///
    text(1.3 .4 "Auto90", color("`crm6'") fcolor(white) bexpand j(left)) text(1.3 .74 "Auto95", color("`crm4'")) ///
    plotregion(margin(b=0 l=0)) ylab(#3) ///
    xline(`tresh_auto90_r' `tresh_auto95_r', lpattern(dash) lcolor(black))
    graph export ${fig_dir}/appendix/Figure_A6a_ipc4_histogram.pdf, replace
    graph export ${fig_dir}/appendix/Figure_A6a_ipc4_histogram.eps, replace

    * IPC4 pairs
    import delim ${classification_dir}/V6/codes_auto95_ipc4_pairs_n.csv, varnames(1) clear

    *throw out codes that python left in there mistakenly (F41/F42 codes), only an issue for the pairs
    gen pair = ipc1 + ipc2
    drop if pair == "F41HB32B" | pair == "B60RF42B" | pair == "G01SF41G" | pair == "F41GG02B"
    drop pair


    tw (histogram share if share < `tresh_auto90_r', start(0) width(0.01) lcolor(white) fcolor(gs8) lwidth(0.25pt) freq) ///
    (histogram share if (share >= `tresh_auto90_r') & (share < 0.4801012213485026), start(`tresh_auto90_r') width(0.01) lcolor(white) fcolor("`crm6'") lwidth(0.25pt) freq) ///
    (histogram share if share >= 0.4801012213485026, start(0.4801012213485026) width(0.01) lcolor(white) lwidth(0.25pt) fcolor("`crm4'") freq), /// 
    xtitle("Prevalence of automation keywords") ytitle("Frequency") ///
    legend(off) yscale(r(0 .)) ///
    text(25 .46 "Auto90", color("`crm6'") fcolor(white) bexpand j(left)) text(25 .7 "Auto95", color("`crm4'")) ///
    plotregion(margin(b=0 l=0)) ylab(#3) ///
    xline(`tresh_auto90_r' `tresh_auto95_r', lpattern(dash) lcolor(black))
    graph export ${fig_dir}/appendix/Figure_A6b_ipc4_pairs_histogram.pdf, replace
    graph export ${fig_dir}/appendix/Figure_A6b_ipc4_pairs_histogram.eps, replace


}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close fig